In [1]:
import graphlab
In [2]:
sf = graphlab.SFrame('home_data.gl/')
In [3]:
sales = sf
In [4]:
sales
Out[4]:
In [5]:
sales.show()
In [6]:
graphlab.canvas.set_target('ipynb')
In [7]:
sales.show(view='Scatter Plot', x='sqft_living', y='price')
In [9]:
train_dataset, test_dataset = sales.random_split(.8, seed=0)
In [10]:
sqft_model = graphlab.linear_regression.create(train_dataset, target='price', features=['sqft_living'])
In [11]:
print test_dataset['price'].mean()
In [12]:
print sqft_model.evaluate(test_dataset)
In [16]:
import matplotlib.pyplot as plt
%matplotlib inline
In [17]:
plt.plot(test_dataset['sqft_living'], test_dataset['price'], '.',
test_dataset['sqft_living'], sqft_model.predict(test_dataset), '-'
)
Out[17]:
In [19]:
sqft_model.get('coefficients')
Out[19]:
In [21]:
my_features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'zipcode']
In [22]:
sales[my_features].show()
In [24]:
sales.show(view='BoxWhisker Plot', x='zipcode', y='price')
In [25]:
my_features_model = graphlab.linear_regression.create(train_dataset, target='price', features=my_features)
In [26]:
print sqft_model.evaluate(test_dataset)
print my_features_model.evaluate(test_dataset)
In [27]:
house1 = sales[sales['id'] == '5309101200']
In [28]:
house1
Out[28]:
In [29]:
print house1['price']
In [30]:
print sqft_model.predict(house1)
In [32]:
print my_features_model.predict(house1)
In [33]:
house2 = sales[sales['id'] == '1925069082']
In [34]:
house2
Out[34]:
In [35]:
print house2['price']
In [36]:
print sqft_model.predict(house2)
In [37]:
print my_features_model.predict(house2)
In [38]:
# it was Bill Gates house
In [39]:
house_temp = sales[['id']=='5309101200']
In [40]:
house_temp
Out[40]:
In [ ]: